import pandas
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder



data = pd.read_csv('movieListReal.csv')


le_type = LabelEncoder()
le_country = LabelEncoder()

data['type'] = le_type.fit_transform(data['type'])
data['country'] = le_country.fit_transform(data['country'])

X = data[['type','country','firstBoxOffice']]
y = data['allBoxOffice']

#划分训练集
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

#训练模型
model = RandomForestRegressor(n_estimators=100,random_state=42)

model.fit(X_train,y_train)

y_pred = model.predict(X_test)


def predict_boxOffice(movie_type,country,first_boxOffice):
    movie_type_encode = le_type.transform([movie_type])[0]
    country_encode = le_country.transform([country])[0]

    input_features = [[movie_type_encode,country_encode,first_boxOffice]]

    #进行预测
    predic_boxResult = model.predict(input_features)[0]
    return predic_boxResult


# predicted_value = predict_boxOffice('动作','美国','32626')
# print(predicted_value)